Go back to the Preprocessing page. This link might be useful to keep track of the files created during the preprocessing.

Let us set some global options for all code chunks in this document.

knitr::opts_chunk$set(
  message = FALSE,    # Disable messages printed by R code chunks
  warning = FALSE,    # Disable warnings printed by R code chunks
  echo = TRUE,        # Show R code within code chunks in output
  include = TRUE,     # Include both R code and its results in output
  eval = TRUE,       # Evaluate R code chunks
  cache = FALSE,       # Enable caching of R code chunks for faster rendering
  fig.align = "center",
  out.width = "100%",
  retina = 2,
  error = TRUE,
  collapse = FALSE
)
rm(list = ls())
set.seed(1982)

1 Import libraries

# Install R-INLA package
# install.packages("INLA",repos = c(getOption("repos"),INLA ="https://inla.r-inla-download.org/R/testing"), dep = TRUE)
# Update R-INLA package
# inla.upgrade(testing = TRUE)
# Install inlabru package
# remotes::install_github("inlabru-org/inlabru", ref = "devel")
# Install rSPDE package
# remotes::install_github("davidbolin/rspde", ref = "devel")
# Install MetricGraph package
# remotes::install_github("davidbolin/metricgraph", ref = "devel")

library(INLA)
library(inlabru)
library(rSPDE)
library(MetricGraph)

library(plotly)
library(dplyr)
library(tidyr)
library(sf)
library(mapview)
library(listviewer)
library(jsonlite)
library(ggplot2)

library(here) # here() starts from the home directory
library(rmarkdown)

rm(list = ls()) # Clear the workspace
set.seed(1982) # Set seed for reproducibility

2 Load the graph and data

# Load the graph
load(here("data_files/graph_construction_on_27JUN2024_FRC013456.RData"))
# Load the data
load(here("data_files/day7142128hour13noconsecutivezeroes.RData"))

2.1 Explore the data

df |> head(5) |> paged_table()
df |> dim()
## [1] 149704      5
# Plot using ggplot
ggplot(df) +
  geom_sf(aes(color = speed)) +
  facet_wrap(~day, ncol = 1) +
  theme_minimal() +
  labs(title = "Speed by Day", color = "Speed") +
  theme(plot.title = element_text(hjust = 0.5))

3 Add the speed observations to the graph

# Build polygon to cut the network and the data
polygon <- st_multipoint(c(st_point(c(-122.53000, 37.69702)),
                          st_point(c(-122.37000, 37.69702)),
                          st_point(c(-122.37000, 37.82600)),
                          st_point(c(-122.53000, 37.82600)))) %>%
  st_cast("POLYGON") %>%
  st_sfc(crs = st_crs(df)) # df dataset needs to be loaded to get the crs

# Filter the data to get only the data within the polygon
data.reduced <- st_filter(x = df, y = polygon, .predicate = st_within)

# Add the observations to the graph
sf_graph$add_observations(data = data.reduced, group = "day", tolerance = 0.02, duplicated_strategy = "jitter") # tolerance = 20m

# Get the data from the graph (that is, in graph coordinates)
data_on_graph <- sf_graph$get_data()

# Save the graph-processed data
save(data_on_graph, file = here("data_files/data_day7142128_hour13_with_no_consecutive_zeros_27JUN2024_FRC013456_graph_processed.RData"))

3.1 Explore the data

data_on_graph |> head(5) |> paged_table()
data_on_graph |> dim()
## [1] 137267     10
# Plot using ggplot
ggplot(data_on_graph) +
  geom_point(aes(x = .coord_x, y = .coord_y, color = speed)) +
  facet_wrap(~day, ncol = 1) +
  theme_minimal() +
  labs(title = "Speed by Day", color = "Speed") +
  theme(plot.title = element_text(hjust = 0.5))